importation of the libraries

library(sp)
## Warning: package 'sp' was built under R version 4.5.2
library(ncdf4, warn.conflicts = FALSE)
## Warning: package 'ncdf4' was built under R version 4.5.2
library(ggplot2, warn.conflicts = FALSE)
library(dplyr, warn.conflicts = FALSE)
library(tidyr, warn.conflicts = FALSE)
library(raster, warn.conflicts = FALSE)
## Warning: package 'raster' was built under R version 4.5.2
### I used warn.conflicts = FALSE because when i ran the chunk above there are massages that appear like "## The following objects are masked from 'package:stats':" ###

Now i will show a brief example of my data visualization for one file (january)

nc_file <- nc_open("C:\\Users\\azizj\\Desktop\\finalrproject\\ncfile\\PREmm20220101000000120IMPGS01GL.nc")
var_names <- names(nc_file$var)
print("Available variables:")
## [1] "Available variables:"
print(var_names)
##  [1] "time_bnds"        "lat_bnds"         "lon_bnds"         "record_status"   
##  [5] "precipitation"    "num_obs_fraction" "num_obs_rate"     "num_days"        
##  [9] "quality_flag"     "num_days_snow"
lon <- ncvar_get(nc_file, "lon_bnds")
lat <- ncvar_get(nc_file, "lat_bnds")
time <- ncvar_get(nc_file, "time_bnds")
precipitation <- ncvar_get(nc_file, "precipitation")
num_obs_fraction <- ncvar_get(nc_file, "num_obs_fraction")
num_obs_rate <- ncvar_get(nc_file, "num_obs_rate")
num_days <- ncvar_get(nc_file, "num_days")
quality_flag <- ncvar_get(nc_file, "quality_flag")
#head(precipitation)
head(time)
## [1] 694310400 696988800

After reviewing the variables ppresent in my file , now we can plot them to understand them

precip_raster <- raster(t(precipitation), 
                        xmn = min(lon), xmx = max(lon),
                        ymn = min(lat), ymx = max(lat))
precip_raster
## class      : RasterLayer 
## dimensions : 180, 360, 64800  (nrow, ncol, ncell)
## resolution : 1, 1  (x, y)
## extent     : -180, 180, -90, 90  (xmin, xmax, ymin, ymax)
## crs        : NA 
## source     : memory
## names      : layer 
## values     : 0, 30.86  (min, max)
# Plot precipitation map
plot(precip_raster, main = "January Precipitation (mm)/day",
     xlab = "Longitude", ylab = "Latitude")
maps::map('world', add = TRUE, col = "gray50")

# Prepare data for ggplot
precip_df <- as.data.frame(precip_raster, xy = TRUE)
colnames(precip_df) <- c("lon", "lat", "precipitation")
# Histogram of precipitation values
precip_vector <- as.vector(precipitation)
precip_vector <- precip_vector[!is.na(precip_vector)]

hist(precip_vector, breaks = 50, col = "lightblue",
     main = "Distribution of Precipitation Values",
     xlab = "Precipitation (mm)", ylab = "Frequency")

max(precip_vector)
## [1] 30.86
min(precip_vector)
## [1] 0

We can see the frequencies of precipitation per day !

example of a region (Europe)

europe_bbox <- extent(-10, 40, 35, 70)  # xmin, xmax, ymin, ymax
precip_europe <- crop(precip_raster, europe_bbox)

plot(precip_europe, main = "Precipitation/day - Europe in January")
maps::map('world', add = TRUE, col = "gray50")

# Missing values with % in every file

nc_files <- list.files(pattern = "\\.nc$", full.names = TRUE)

# Check NA and duplicates
cat("File Check Results:\n")
## File Check Results:
cat("===================\n")
## ===================
for (file in nc_files) {
  nc <- nc_open(file)
  precip <- ncvar_get(nc, "precipitation")
  nc_close(nc)
  
  na_pct <- round(sum(is.na(precip)) / length(precip) * 100, 1)
  cat(basename(file), "-> NA:", na_pct, "%\n")
}
## PREmm20220101000000120IMPGS01GL.nc -> NA: 1.1 %
## PREmm20220201000000120IMPGS01GL.nc -> NA: 0.9 %
## PREmm20220301000000120IMPGS01GL.nc -> NA: 1 %
## PREmm20220401000000120IMPGS01GL.nc -> NA: 1 %
## PREmm20220501000000120IMPGS01GL.nc -> NA: 1.1 %
## PREmm20220601000000120IMPGS01GL.nc -> NA: 1.1 %
## PREmm20220701000000120IMPGS01GL.nc -> NA: 1 %
## PREmm20220801000000120IMPGS01GL.nc -> NA: 1 %
## PREmm20220901000000120IMPGS01GL.nc -> NA: 1.1 %
## PREmm20221001000000120IMPGS01GL.nc -> NA: 1 %
## PREmm20221101000000120IMPGS01GL.nc -> NA: 1 %
## PREmm20221201000000120IMPGS01GL.nc -> NA: 1 %

Duplicates

cat("\nDuplicate Check:\n")
## 
## Duplicate Check:
cat("================\n")
## ================
hashes <- c()
for (file in nc_files) {
  nc <- nc_open(file)
  precip <- ncvar_get(nc, "precipitation")
  nc_close(nc)
  hash <- paste(dim(precip), sum(precip, na.rm = TRUE), sep = "-")
  hashes <- c(hashes, hash)
}

if (any(duplicated(hashes))) {
  dup_indices <- which(duplicated(hashes) | duplicated(hashes, fromLast = TRUE))
  cat(" Possible duplicates:\n")
  for (i in dup_indices) {
    cat("  ", basename(nc_files[i]), "\n")
  }
} else {
  cat("✓ No duplicates detected\n")
}
## ✓ No duplicates detected
#Satellite data often has 1-5% NA values - this is normal!

#Missing data usually occurs over:

  #High latitudes (sensor limitations)

  #Desert regions (low signal)

  #Coastlines (mixed pixels)

The purpose for the work now is to understand the mean precipitation per day in the world in form of graphs for the 12 months in 2022

# Now we are going to do the work for all the months
library(purrr, warn.conflicts = FALSE)
library(stringr, warn.conflicts = FALSE)
library(lubridate, warn.conflicts = FALSE)
## Warning: package 'lubridate' was built under R version 4.5.2
library(reshape2, warn.conflicts = FALSE)
## Warning: package 'reshape2' was built under R version 4.5.2
library(maps, warn.conflicts = FALSE)
## Warning: package 'maps' was built under R version 4.5.2
library(mapdata, warn.conflicts = FALSE)
## Warning: package 'mapdata' was built under R version 4.5.2

I fixed the path so i can work without problems of file access

setwd("C:/Users/azizj/Desktop/finalrproject/ncfile")  # Change this to your actual path

# 1. Identify all NetCDF files
nc_files <- list.files(pattern = "\\.nc$", full.names = TRUE)
print(basename(nc_files))
##  [1] "PREmm20220101000000120IMPGS01GL.nc" "PREmm20220201000000120IMPGS01GL.nc"
##  [3] "PREmm20220301000000120IMPGS01GL.nc" "PREmm20220401000000120IMPGS01GL.nc"
##  [5] "PREmm20220501000000120IMPGS01GL.nc" "PREmm20220601000000120IMPGS01GL.nc"
##  [7] "PREmm20220701000000120IMPGS01GL.nc" "PREmm20220801000000120IMPGS01GL.nc"
##  [9] "PREmm20220901000000120IMPGS01GL.nc" "PREmm20221001000000120IMPGS01GL.nc"
## [11] "PREmm20221101000000120IMPGS01GL.nc" "PREmm20221201000000120IMPGS01GL.nc"
# 2. Function to extract month from filename
extract_month <- function(filename) {
  # Assuming filenames contain month information like "202201", "202202", etc.
  month_str <- str_extract(basename(filename), "2022[0-9]{2}")
  if (!is.na(month_str)) {
    return(as.numeric(substr(month_str, 5, 6)))
  } else {
    # Alternative: extract from time coverage in file
    return(NA)
  }
}
extract_month("PREmm20221001000000120IMPGS01GL.nc")
## [1] 10
extract_month("PREmm20220101000000120IMPGS01GL.nc")
## [1] 1
extract_month("PREmm20220601000000120IMPGS01GL.nc")
## [1] 6

Now we are going to make a function to process a single file

# 3. Main function to process a single NetCDF file
process_nc_file <- function(file_path) {
  cat("Processing:", basename(file_path), "\n")
  
  # Open NetCDF file
  nc_file <- nc_open(file_path)
  
  # Extract basic information
  tryCatch({
    # Read dimensions
    lon <- ncvar_get(nc_file, "lon")
    lat <- ncvar_get(nc_file, "lat")
    time_val <- ncvar_get(nc_file, "time")
    
    # Read precipitation data
    precipitation <- ncvar_get(nc_file, "precipitation")
    
    # Get time information and convert to date
    time_units <- ncatt_get(nc_file, "time", "units")$value
    if (grepl("since 2000", time_units)) {
      date_actual <- as.POSIXct(time_val, origin = "2000-01-01", tz = "UTC")
    } else if (grepl("since 1970", time_units)) {
      date_actual <- as.POSIXct(time_val, origin = "1970-01-01", tz = "UTC")
    } else {
      date_actual <- NA
    }
    
    # Extract month from date
    if (!is.na(date_actual)) {
      month_num <- as.numeric(format(date_actual, "%m"))
      month_name <- format(date_actual, "%B %Y")
    } else {
      # Try to extract from filename as fallback
      month_num <- extract_month(file_path)
      month_name <- ifelse(!is.na(month_num), 
                           paste(month.name[month_num], "2022"), 
                           "Unknown Month")
    }
    
    # Calculate statistics
    precip_vector <- as.vector(precipitation)
    precip_vector <- precip_vector[!is.na(precip_vector)]
    
    stats <- list(
      file = basename(file_path),
      month = month_num,
      month_name = month_name,
      date = ifelse(!is.na(date_actual), as.character(date_actual[1]), NA),
      mean_precip = mean(precip_vector, na.rm = TRUE),
      median_precip = median(precip_vector, na.rm = TRUE),
      max_precip = max(precip_vector, na.rm = TRUE),
      min_precip = min(precip_vector, na.rm = TRUE)
      
    )
    
    # Create raster object for spatial analysis
    precip_raster <- raster(t(precipitation), 
                            xmn = min(lon), xmx = max(lon),
                            ymn = min(lat), ymx = max(lat))
    
    # Return results
    result <- list(
      stats = stats,
      raster = precip_raster,
      matrix = precipitation,
      lon = lon,
      lat = lat
    )
    
    return(result)
    
  }, error = function(e) {
    cat("Error processing", basename(file_path), ":", e$message, "\n")
    return(NULL)
  }, finally = {
    nc_close(nc_file)
  })
}

# 4. Process all files
cat("\n=== Processing all files ===\n")
## 
## === Processing all files ===
all_results <- purrr::map(nc_files, process_nc_file)
## Processing: PREmm20220101000000120IMPGS01GL.nc 
## Processing: PREmm20220201000000120IMPGS01GL.nc 
## Processing: PREmm20220301000000120IMPGS01GL.nc 
## Processing: PREmm20220401000000120IMPGS01GL.nc 
## Processing: PREmm20220501000000120IMPGS01GL.nc 
## Processing: PREmm20220601000000120IMPGS01GL.nc 
## Processing: PREmm20220701000000120IMPGS01GL.nc 
## Processing: PREmm20220801000000120IMPGS01GL.nc 
## Processing: PREmm20220901000000120IMPGS01GL.nc 
## Processing: PREmm20221001000000120IMPGS01GL.nc 
## Processing: PREmm20221101000000120IMPGS01GL.nc 
## Processing: PREmm20221201000000120IMPGS01GL.nc
# Remove any failed processing
all_results <- compact(all_results)
# 5. Extract statistics and create summary data frame
stats_list <- purrr::map(all_results, "stats")
summary_df <- bind_rows(stats_list) %>%
  arrange(month)

cat("\n=== Summary Statistics ===\n")
## 
## === Summary Statistics ===
print(summary_df)
## # A tibble: 12 × 8
##    file   month month_name date  mean_precip median_precip max_precip min_precip
##    <chr>  <dbl> <chr>      <chr>       <dbl>         <dbl>      <dbl>      <dbl>
##  1 PREmm…     1 January 2… 2022…        2.12         0.615       30.9          0
##  2 PREmm…     2 February … 2022…        2.13         0.600       40.0          0
##  3 PREmm…     3 March 2022 2022…        2.13         0.730       35.0          0
##  4 PREmm…     4 April 2022 2022…        2.10         0.850       36.2          0
##  5 PREmm…     5 May 2022   2022…        2.14         0.980       33.9          0
##  6 PREmm…     6 June 2022  2022…        2.14         1.10        39.1          0
##  7 PREmm…     7 July 2022  2022…        2.22         0.900       36.4          0
##  8 PREmm…     8 August 20… 2022…        2.18         1.01        40.8          0
##  9 PREmm…     9 September… 2022…        2.18         0.950       39.1          0
## 10 PREmm…    10 October 2… 2022…        2.01         0.770       34.2          0
## 11 PREmm…    11 November … 2022…        2.06         0.690       34.1          0
## 12 PREmm…    12 December … 2022…        2.12         0.700       30.9          0

The data frame above summaries the mean_precipitation per day all over the world for every month , the max_precipitation and the min

## Plot 2: Time series of monthly statistics
summary_df_long <- summary_df %>%
  dplyr::select(month, mean_precip, median_precip, max_precip) %>%
  melt(id.vars = "month")

ggplot(summary_df_long, aes(x = month, y = value, color = variable)) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  labs(title = "Precipitation Statistics by Month - 2022",
       x = "Month", y = "Precipitation (mm)",
       color = "Statistic") +
  scale_x_continuous(breaks = 1:12, labels = month.abb) +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Function to plot individual month
plot_monthly_map <- function(result, index) {
  precip_raster <- result$raster
  month_name <- result$stats$month_name
  
  # Convert to data frame for ggplot
  precip_df <- as.data.frame(precip_raster, xy = TRUE)
  colnames(precip_df) <- c("lon", "lat", "precipitation")
  
  ggplot(precip_df, aes(x = lon, y = lat, fill = precipitation)) +
    geom_tile() +
    borders("world", colour = "black", fill = NA, size = 0.2) +
    scale_fill_viridis_c(option = "plasma", 
                         name = "Precipitation (mm)",
                         limits = c(0, max(summary_df$max_precip))) +
    coord_quickmap() +
    labs(title = paste("Precipitation/day -", month_name),
         x = "Longitude", y = "Latitude") +
    theme_minimal() +
    theme(plot.title = element_text(hjust = 0.5))
}
monthly_plots <- map2(all_results, seq_along(all_results), plot_monthly_map)
## Warning: `borders()` was deprecated in ggplot2 4.0.0.
## ℹ Please use `annotation_borders()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Display all plots
print(monthly_plots[[1]])

print(monthly_plots[[2]])

print(monthly_plots[[3]])

print(monthly_plots[[4]])

print(monthly_plots[[5]])

print(monthly_plots[[6]])

print(monthly_plots[[7]])

print(monthly_plots[[8]])

print(monthly_plots[[9]])

print(monthly_plots[[10]])

print(monthly_plots[[11]])

print(monthly_plots[[12]])

Annual percipitation

# Extract all rasters
all_rasters <- purrr::map(all_results, "raster")

# Create annual mean (if all rasters have same extent)
if (length(all_rasters) > 0) {
  annual_mean <- mean(stack(all_rasters), na.rm = TRUE)
  
  # Plot annual mean
  annual_df <- as.data.frame(annual_mean, xy = TRUE)
  colnames(annual_df) <- c("lon", "lat", "precipitation")
  
  ggplot(annual_df, aes(x = lon, y = lat, fill = precipitation)) +
    geom_tile() +
    borders("world", colour = "black", fill = NA) +
    scale_fill_viridis_c(option = "plasma", name = "Annual Mean\nPrecipitation (mm)") +
    coord_quickmap() +
    labs(title = "Annual Mean Precipitation/day - 2022",
         x = "Longitude", y = "Latitude") +
    theme_minimal()
}

### Let’s take some continents as example (Europe and africa)

# 9. Regional analysis example (Europe)
# Europe bounding box
europe_bbox <- extent(-10, 40, 35, 70)

# Create monthly plots for Europe
plot_europe_monthly <- function(result, index) {
  precip_raster <- result$raster
  month_name <- result$stats$month_name
  
  precip_europe <- crop(precip_raster, europe_bbox)
  precip_df <- as.data.frame(precip_europe, xy = TRUE)
  colnames(precip_df) <- c("lon", "lat", "precipitation")
  
  ggplot(precip_df, aes(x = lon, y = lat, fill = precipitation)) +
    geom_tile() +
    borders("world", colour = "black", fill = NA, size = 0.2) +
    scale_fill_viridis_c(option = "turbo", name = "Precipitation (mm)") +
    coord_quickmap(xlim = c(-10, 40), ylim = c(35, 70)) +
    labs(title = paste("Europe mean Precipitation/day -", month_name),
         x = "Longitude", y = "Latitude") +
    theme_minimal()
}

# Create European maps
europe_plots <- map2(all_results, seq_along(all_results), plot_europe_monthly)
europe_plots
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## There are other libraries that we can work with to create cartographics , like the ones we’ll use now

library(sf, warn.conflicts = FALSE)
## Warning: package 'sf' was built under R version 4.5.2
## Linking to GEOS 3.13.1, GDAL 3.11.4, PROJ 9.7.0; sf_use_s2() is TRUE
library(rnaturalearth, warn.conflicts = FALSE)
## Warning: package 'rnaturalearth' was built under R version 4.5.2
library(rnaturalearthdata, warn.conflicts = FALSE)
## Warning: package 'rnaturalearthdata' was built under R version 4.5.2
 africa_sf <- rnaturalearth::ne_countries(continent = 'africa', returnclass = "sf")
world_sf <- rnaturalearth::ne_countries(scale = "medium", returnclass = "sf")

# Define Africa bounding box 
africa_bbox <- raster::extent(-25, 60, -40, 40)

# function for Africa maps
create_africa_map_improved <- function(result) {
  precip_raster <- result$raster
  month_name <- result$stats$month_name
  
  # Crop to Africa region 
  precip_africa <- raster::crop(precip_raster, africa_bbox)
  precip_africa <- raster::mask(precip_africa, as(africa_sf, "Spatial"))
  
  # Convert to data frame
  precip_df <- as.data.frame(precip_africa, xy = TRUE)
  colnames(precip_df) <- c("lon", "lat", "precipitation")
  precip_df <- precip_df[!is.na(precip_df$precipitation), ]
  
  # Create the plot
  ggplot() +
    # Precipitation data
    geom_tile(data = precip_df, aes(x = lon, y = lat, fill = precipitation)) +
    # Africa boundaries
    geom_sf(data = africa_sf, fill = NA, color = "black", size = 0.3) +
    # World context
    geom_sf(data = world_sf, fill = NA, color = "gray50", size = 0.1) +
    # Color scale
    scale_fill_viridis_c(
      option = "turbo", 
      name = "Precipitation\n(mm)",
      limits = c(0, max(summary_df$max_precip, na.rm = TRUE)),
      na.value = NA
    ) +
    # Proper coordinate system and limits
    coord_sf(
      xlim = c(-25, 60), 
      ylim = c(-40, 40),
      expand = FALSE,
      crs = st_crs(4326)
    ) +
    labs(
      title = paste("Africa Precipitation/day -", month_name),
      subtitle = "Monthly precipitation distribution",
      x = "Longitude", 
      y = "Latitude"
    ) +
    theme_minimal() +
    theme(
      plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
      plot.subtitle = element_text(hjust = 0.5, size = 10),
      panel.background = element_rect(fill = "lightblue", colour = NA),
      panel.grid = element_line(color = "gray80", size = 0.2)
    )
}

# Create improved Africa maps for first 3 months
africa_improved_plots <- purrr::map(all_results[1:3], create_africa_map_improved)
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Display improved plots
africa_improved_plots[[1]]

africa_improved_plots[[2]]

africa_improved_plots[[3]]

## The puspose of our study now is to analyse the total of precipitation monthly

calculate_monthly_total <- function(result, month_num) {
  cat("Processing month", month_num, "\n")
  
  # Your raster contains daily mean precipitation (mm/day)
  daily_mean_raster <- result$raster
  
  # Get number of days in the specific month
  year <- 2022
  days_in_month <- lubridate::days_in_month(as.Date(paste0(year, "-", 
                                                          sprintf("%02d", month_num), "-01")))
  
  # Calculate monthly total: daily mean × number of days
  monthly_total_raster <- daily_mean_raster * days_in_month
  
  # Set proper name
  names(monthly_total_raster) <- paste0("Month_", month_num, "_Total")
  
  return(list(
    raster = monthly_total_raster,
    daily_mean = daily_mean_raster,
    days_in_month = days_in_month,
    month_num = month_num,
    month_name = month.name[month_num]
  ))
}

# Calculate monthly totals for all months
monthly_totals_list <- purrr::map2(all_results, summary_df$month, calculate_monthly_total)
## Processing month 1 
## Processing month 2 
## Processing month 3 
## Processing month 4 
## Processing month 5 
## Processing month 6 
## Processing month 7 
## Processing month 8 
## Processing month 9 
## Processing month 10 
## Processing month 11 
## Processing month 12
# Extract the total rasters
monthly_total_rasters <- purrr::map(monthly_totals_list, "raster")

# Create a summary data frame of monthly totals
monthly_total_stats <- purrr::map_dfr(monthly_totals_list, function(x) {
  values <- raster::values(x$raster)
  values <- values[!is.na(values)]
  
  data.frame(
    month = x$month_num,
    month_name = x$month_name,
    days_in_month = x$days_in_month,
    monthly_total_mean = mean(values, na.rm = TRUE),
    monthly_total_median = median(values, na.rm = TRUE),
    monthly_total_max = max(values, na.rm = TRUE),
    monthly_total_min = min(values, na.rm = TRUE),
    monthly_total_sd = sd(values, na.rm = TRUE)
  )
})

cat("\n=== MONTHLY PRECIPITATION TOTALS ===\n")
## 
## === MONTHLY PRECIPITATION TOTALS ===
print(monthly_total_stats)
##     month month_name days_in_month monthly_total_mean monthly_total_median
## Jan     1    January            31           65.65427               19.065
## Feb     2   February            28           59.51716               16.800
## Mar     3      March            31           65.91723               22.630
## Apr     4      April            30           63.06270               25.500
## May     5        May            31           66.38053               30.380
## Jun     6       June            30           64.31141               33.000
## Jul     7       July            31           68.68429               27.900
## Aug     8     August            31           67.58953               31.310
## Sep     9  September            30           65.33686               28.500
## Oct    10    October            31           62.43323               23.870
## Nov    11   November            30           61.68747               20.700
## Dec    12   December            31           65.56735               21.700
##     monthly_total_max monthly_total_min monthly_total_sd
## Jan            956.66                 0        107.87773
## Feb           1119.72                 0        101.36025
## Mar           1084.38                 0        100.06061
## Apr           1084.50                 0         98.89566
## May           1049.97                 0        105.66947
## Jun           1173.00                 0         97.14582
## Jul           1127.16                 0        108.97088
## Aug           1264.18                 0        105.67906
## Sep           1173.30                 0        103.56411
## Oct           1058.65                 0        100.12810
## Nov           1023.30                 0         96.66007
## Dec            957.28                 0        102.08184

16. Visualize Monthly Precipitation Totals

# 1. Bar plot of monthly totals
ggplot(monthly_total_stats, aes(x = factor(month, labels = month.abb), 
                                y = monthly_total_mean)) +
  geom_bar(stat = "identity", fill = "darkgreen", alpha = 0.7) +
  geom_text(aes(label = round(monthly_total_mean, 1)), 
            vjust = -0.5, size = 3) +
  labs(title = "Monthly Precipitation Totals - 2022",
       subtitle = "Total precipitation per month (mm)",
       x = "Month", y = "Total Precipitation (mm)") +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))

# 2. Compare daily mean vs monthly total for a specific location
# Let's take a point (e.g., London: 0° lon, 51° lat)
london_point <- data.frame(lon = 0, lat = 51)

extract_precipitation_series <- function(month_totals, point) {
  purrr::map_dfr(month_totals, function(x) {
    daily_mean <- raster::extract(x$daily_mean, point)
    monthly_total <- raster::extract(x$raster, point)
    
    data.frame(
      month = x$month_num,
      month_name = x$month_name,
      daily_mean_mm = daily_mean,
      monthly_total_mm = monthly_total
    )
  })
}

london_precip <- extract_precipitation_series(monthly_totals_list, london_point)

ggplot(london_precip, aes(x = factor(month, labels = month.abb))) +
  geom_bar(aes(y = monthly_total_mm, fill = "Monthly Total"), 
           stat = "identity", alpha = 0.7) +
  geom_point(aes(y = daily_mean_mm * 30, color = "Daily Mean × 30"), 
             size = 3) +
  scale_fill_manual(values = c("Monthly Total" = "steelblue")) +
  scale_color_manual(values = c("Daily Mean × 30" = "red")) +
  labs(title = "London Precipitation - 2022",
       subtitle = "Monthly totals vs scaled daily means",
       x = "Month", y = "Precipitation (mm)",
       fill = "", color = "") +
  theme_minimal() +
  theme(legend.position = "top")

17. Spatial Maps of Monthly Totals

# Function to plot monthly total maps
plot_monthly_total_map <- function(month_total) {
  precip_df <- as.data.frame(month_total$raster, xy = TRUE)
  colnames(precip_df) <- c("lon", "lat", "precipitation")
  precip_df <- precip_df[!is.na(precip_df$precipitation), ]
  
  ggplot() +
    geom_tile(data = precip_df, aes(x = lon, y = lat, fill = precipitation)) +
    borders("world", colour = "black", fill = NA, size = 0.2) +
    scale_fill_viridis_c(
      option = "turbo", 
      name = "Monthly\nTotal (mm)",
      limits = c(0, max(monthly_total_stats$monthly_total_max, na.rm = TRUE))
    ) +
    coord_quickmap() +
    labs(
      title = paste("Monthly Precipitation Total -", month_total$month_name),
      subtitle = paste("Total:", round(mean(precip_df$precipitation, na.rm = TRUE), 1), "mm"),
      x = "Longitude", 
      y = "Latitude"
    ) +
    theme_minimal() +
    theme(plot.title = element_text(hjust = 0.5, face = "bold"))
}

# Plot first 3 months' totals
monthly_total_maps <- purrr::map(monthly_totals_list[1:9], plot_monthly_total_map)
monthly_total_maps[[1]]

monthly_total_maps[[2]]

monthly_total_maps[[3]]

monthly_total_maps[[4]]

monthly_total_maps[[5]]

monthly_total_maps[[6]]

monthly_total_maps[[7]]

monthly_total_maps[[8]]

monthly_total_maps[[9]]

## Annual Precipitation Total

# Calculate annual total by summing monthly totals
if (length(monthly_total_rasters) == 12) {
  annual_total_raster <- sum(raster::stack(monthly_total_rasters), na.rm = TRUE)
  
  # Plot annual total
  annual_total_df <- as.data.frame(annual_total_raster, xy = TRUE)
  colnames(annual_total_df) <- c("lon", "lat", "annual_precipitation")
  
  ggplot(annual_total_df, aes(x = lon, y = lat, fill = annual_precipitation)) +
    geom_tile() +
    borders("world", colour = "black", fill = NA, size = 0.2) +
    scale_fill_viridis_c(
      option = "plasma", 
      name = "Annual\nTotal (mm)",
      limits = c(0, max(annual_total_df$annual_precipitation, na.rm = TRUE))
    ) +
    coord_quickmap() +
    labs(
      title = "Annual Precipitation Total - 2022",
      subtitle = "Sum of monthly precipitation totals",
      x = "Longitude", 
      y = "Latitude"
    ) +
    theme_minimal()
  
  # Calculate global annual statistics
  annual_values <- annual_total_df$annual_precipitation
  annual_values <- annual_values[!is.na(annual_values)]
  
  cat("\n=== ANNUAL PRECIPITATION TOTAL ===\n")
  cat(sprintf("Global mean annual precipitation: %.1f mm\n", mean(annual_values)))
  cat(sprintf("Global median annual precipitation: %.1f mm\n", median(annual_values)))
  cat(sprintf("Global max annual precipitation: %.1f mm\n", max(annual_values)))
  cat(sprintf("Global min annual precipitation: %.1f mm\n", min(annual_values)))
  
  # Compare with global average (~1000 mm/year)
  global_climatology <- 1000
  diff_percent <- (mean(annual_values) - global_climatology) / global_climatology * 100
  cat(sprintf("Difference from global climatology (1000 mm): %.1f%%\n", diff_percent))
}
## 
## === ANNUAL PRECIPITATION TOTAL ===
## Global mean annual precipitation: 768.1 mm
## Global median annual precipitation: 485.4 mm
## Global max annual precipitation: 8415.6 mm
## Global min annual precipitation: 0.0 mm
## Difference from global climatology (1000 mm): -23.2%

Let’s take specific regions to study

# Analyze monthly totals for specific regions
analyze_region_totals <- function(region_name, bbox) {
  region_totals <- purrr::map_dfr(monthly_totals_list, function(month_data) {
    region_raster <- raster::crop(month_data$raster, bbox)
    region_raster <- raster::mask(region_raster, as(africa_sf, "Spatial"))
    
    values <- raster::values(region_raster)
    values <- values[!is.na(values)]
    
    data.frame(
      month = month_data$month_num,
      month_name = month_data$month_name,
      region = region_name,
      mean_total = mean(values, na.rm = TRUE),
      median_total = median(values, na.rm = TRUE)
    )
  })
  
  return(region_totals)
}

# Example: Compare different regions
regions <- list(
  "Tropics" = raster::extent(-180, 180, -23.5, 23.5),
  "Northern Hemisphere" = raster::extent(-180, 180, 23.5, 90),
  "Southern Hemisphere" = raster::extent(-180, 180, -90, -23.5)
)

region_analyses <- purrr::map2_df(names(regions), regions, analyze_region_totals)

# Plot regional comparisons
ggplot(region_analyses, aes(x = factor(month, labels = month.abb), 
                            y = mean_total, fill = region)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Monthly Precipitation Totals by Region - 2022",
       x = "Month", y = "Mean Monthly Total (mm)",
       fill = "Region") +
  theme_minimal() +
  theme(legend.position = "top")

Conclusion:

Key Advantages of NetCDF for Climate Data :

1 - Multi-dimensional Structure : NetCDF handles 4D data naturally: Time × Latitude × Longitude × Variable

2 - Self-describing Format : Each NetCDF file contains its own metadata

3 - Efficient Storage

Summary after visualization :

The precipitation are much higher in the tropics ( Brazil, Mexico, India, Indonesia, the Democratic Republic of Congo, and Australia) ; It rains a lot in the tropics because the equator receives more direct solar energy, which leads to more evaporation from oceans, soil, and vegetation. This warm, moist air rises, cools, and condenses into clouds and thunderstorms, causing heavy precipitation (up to 90mm per month)

For northen hemispherethe (entirety of North America and Europe, most of Asia, two-thirds of Africa, and a small portion of South America ), it generally receives more rain than the Southern Hemisphere due to its warmer temperatures and the role of ocean currents, such as the Atlantic Meridional Overturning Circulation (AMOC), in transporting heat and moisture. (mean 40mm per month)